## Prepare networks - 

# Prepare networks of selected sample:
# In this code, I delete the individual observations, 
# as well as the classes that I will not include in my analysis due to 
# missing values. I will also create a dummy variable in the data to identify 
# those that were excluded in this step

# Load data:
list_networks <- readRDS("list_anonymous_networks.Rds")

students_to_delete <- 
  readRDS("intermediary_outputs/students_id_to_delete.RData") 

# Getting the size of each network:
original_length <- sapply(lapply(list_networks, network.vertex.names), length)

# Creating function to delete all ids that are not valid.
clean_network <- function(network) {
  final_result <- network::delete.vertices(
    network, which(network.vertex.names(network) %in% students_to_delete)
    )
  return(final_result)
}    

list_networks_clean <- lapply(list_networks, clean_network)

new_length <- sapply(lapply(list_networks_clean, network.vertex.names), length)

cbind(original_length, new_length)

## Remove classrooms without verteces
list_new <- list()
j = 0
for (i in 1:length(list_networks_clean)) {
  # i = 3
  size_net <- length(network.vertex.names(list_networks_clean[[i]]))
  
  if (size_net != 0) {
    j = j + 1
    network_new <- list_networks_clean[[i]]
    list_new[[j]] <- network_new       
  }
}

summary(list_new)
sapply(lapply(list_new, network.vertex.names), length)

saveRDS(list_new, file = "intermediary_outputs/list_networks_clean.RData")    

# Include the dummy variable indicating which observations were excluded 
# in this stage
full_data$flag_select_sample <- 0
full_data$flag_select_sample[which(
full_data$id_student %in% students_to_delete)] <- 1

write.csv(full_data, file = "intermediary_outputs/final_sample.csv")

# Clean environment:
rm(list = ls(all.names = TRUE)) 
gc()


  
